# compiles all TESS datasets into one 
# 
library(tidyverse)
library(broom)
library(estimatr)
library(xtable)
library(Hmisc)
library(rio)

##### FOR LOOP #####
# set working directory
setwd("data/analysis/cleaned/")

# empty receptacle
list_of_dfs <- vector("list", length(list.files()))

# loop creates a list of data frames
for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      list_of_dfs[[i]] <- d
}

# binding into one meta-dataset
all_data <- bind_rows(list_of_dfs)

# creating year of study variable
all_data <- all_data |> mutate(study_year = case_when(
      between(StudyId, 1, 11) ~ 2017,
      between(StudyId, 12, 29) ~ 2018,
      between(StudyId, 30, 50) ~ 2019,
      between(StudyId, 51, 56) ~ 2020))

# creating birth year variable
all_data <- all_data |> mutate(birth_year = study_year - AGE)

# removing duplicate respondents
# (b/c some studies were run on the same survey)
# 1, 2, and 5 shared time; 33, 35, 36 shared time; 40 and 43 shared time
all_data <- all_data %>% filter(StudyId != 2, StudyId != 5, 
                                StudyId != 35, StudyId != 36, StudyId != 40)

# give variables meaningful values
all_data <- all_data %>% 
      mutate(EDUC4 = case_when(
                  EDUC < 9 ~ "1No HS degree",
                  EDUC == 9 ~ "2HS degree", 
                  between(EDUC, 10, 11) ~ "3Some college",
                  EDUC > 11 ~ "4BA or higher"),
            EMPLOY4 = case_when(
                  EMPLOY < 3 ~ "Employed",
                  between(EMPLOY, 3, 4) ~ "Unemployed",
                  EMPLOY == 5 ~ "Retired",
                  EMPLOY > 5 ~ "Disabled or other"),
            HOME_TYPE5 = case_when(
                  HOME_TYPE == 1 ~ "Detached house",
                  HOME_TYPE == 2 ~ "Attached house",
                  HOME_TYPE == 3 ~ "Apartment building",
                  HOME_TYPE == 4 ~ "Mobile home",
                  HOME_TYPE == 5 ~ "Boat or RV"),
            RACE = case_when(
                  RACETHNICITY == 1 ~ "White",
                  RACETHNICITY == 2 ~ "Black",
                  RACETHNICITY == 3 ~ "Other",
                  RACETHNICITY == 4 ~ "Hispanic",
                  RACETHNICITY == 5 ~ "Multiracial, non-Hispanic",
                  RACETHNICITY == 6 ~ "Asian"),
            MARITAL6 = case_when(
                  MARITAL == 1 ~ "Married", 
                  MARITAL == 2 ~ "Widowed", 
                  MARITAL == 3 ~ "Divorced", 
                  MARITAL == 4 ~ "Separated",
                  MARITAL == 5 ~ "Never Married", 
                  MARITAL == 6 ~ "Living with Partner"),
            PartyID7 = na_if(PartyID7, 98),
            PartyID7 = na_if(PartyID7, -1),
            PartyIDstrength = abs(PartyID7 - 4),
            PID3 = case_when(
                  PartyID7 < 4 ~ "Democrat",
                  PartyID7 == 4 ~ "Independent", 
                  PartyID7 > 4 ~ "Republican",
                  is.na(PartyID7) ~ "Unknown"),
            INTERNET2 = case_when(
                  INTERNET == 0 ~ "1No internet",
                  INTERNET == 1 ~ "2Internet"),
            GENDER2 = ifelse(GENDER == 1, "Male", "Female"),
            PHONESERVICE5 = case_when(
                  PHONESERVICE == 1 ~ "Landline only",
                  PHONESERVICE == 2 ~ "Mostly cell-phone",
                  PHONESERVICE == 3 ~ "Mostly landline",
                  PHONESERVICE == 4 ~ "Cell-phone only",
                  PHONESERVICE == 5 ~ "No telephone"),
            AGE4 = cut2(all_data$AGE, g=4),
            INCOME5 = cut2(all_data$INCOME, g = 5),
            ATTEND = na_if(ATTEND, 98),
            NRFU = ifelse(TNRFU == 0, "Eager", "Reluctant")
            )

# export dataset
export(all_data, "../pooled_data.csv")

